In [ ]:
%run "../Functions/1. Google form analysis.ipynb"

In [ ]:
# Localplayerguids of users who answered the questionnaire (see below).
# French
#localplayerguid = 'a4d4b030-9117-4331-ba48-90dc05a7e65a'
#localplayerguid = 'd6826fd9-a6fc-4046-b974-68e50576183f'
#localplayerguid = 'deb089c0-9be3-4b75-9b27-28963c77b10c'
#localplayerguid = '75e264d6-af94-4975-bb18-50cac09894c4'
#localplayerguid = '3d733347-0313-441a-b77c-3e4046042a53'
# English
localplayerguid = '8d352896-a3f1-471c-8439-0f426df901c1'
#localplayerguid = '7037c5b2-c286-498e-9784-9a061c778609'
#localplayerguid = '5c4939b5-425b-4d19-b5d2-0384a515539e'
#localplayerguid = '7825d421-d668-4481-898a-46b51efe40f0'
#localplayerguid = 'acb9c989-b4a6-4c4d-81cc-6b5783ec71d8'
#localplayerguid = devPCID5

'Google form analysis' functions checks

copy-paste for unit tests

(userIDThatDidNotAnswer) (userID1AnswerEN) (userIDAnswersEN) (userID1ScoreEN) (userIDScoresEN) (userID1AnswerFR) (userIDAnswersFR) (userID1ScoreFR) (userIDScoresFR) (userIDAnswersENFR)

getAllResponders


In [ ]:
len(getAllResponders())

hasAnswered


In [ ]:
assert(not hasAnswered( userIDThatDidNotAnswer )), "User has NOT answered"

In [ ]:
assert(hasAnswered( userID1AnswerEN )), "User HAS answered"

In [ ]:
assert(hasAnswered( userIDAnswersEN )), "User HAS answered"

In [ ]:
assert(hasAnswered( userID1AnswerFR )), "User HAS answered"

In [ ]:
assert(hasAnswered( userIDAnswersFR )), "User HAS answered"

In [ ]:
assert(hasAnswered( userIDAnswersENFR )), "User HAS answered"

getAnswers


In [ ]:
assert (len(getAnswers( userIDThatDidNotAnswer ).columns) == 0),"Too many answers"

In [ ]:
assert (len(getAnswers( userID1AnswerEN ).columns) == 1),"Too many answers"

In [ ]:
assert (len(getAnswers( userIDAnswersEN ).columns) >= 2),"Not enough answers"

In [ ]:
assert (len(getAnswers( userID1AnswerFR ).columns) == 1),"Not enough columns"

In [ ]:
assert (len(getAnswers( userIDAnswersFR ).columns) >= 2),"Not enough answers"

In [ ]:
assert (len(getAnswers( userIDAnswersENFR ).columns) >= 2),"Not enough answers"

getCorrections


In [ ]:
assert (len(getCorrections( userIDThatDidNotAnswer ).columns) == 0),"Too many answers"

In [ ]:
assert (len(getCorrections( userID1AnswerEN ).columns) == 2),"Too many answers"

In [ ]:
assert (len(getCorrections( userIDAnswersEN ).columns) >= 4),"Not enough answers"

In [ ]:
assert (len(getCorrections( userID1AnswerFR ).columns) == 2),"Too many answers"

In [ ]:
assert (len(getCorrections( userIDAnswersFR ).columns) >= 4),"Not enough answers"

In [ ]:
assert (len(getCorrections( userIDAnswersENFR ).columns) >= 4),"Not enough answers"

getScore


In [ ]:


In [ ]:
assert (len(pd.DataFrame(getScore( userIDThatDidNotAnswer ).values.flatten().tolist()).values.flatten().tolist()) == 0),"Too many answers"

In [ ]:
score = getScore( userID1AnswerEN )
#print(score)
assert (
    (len(score.values.flatten()) == 3)
    and
    score['before'][0][0] == 23
),"Incorrect score"

In [ ]:
score = getScore( userIDAnswersEN )
#print(score)
assert (
    (len(score.values.flatten()) == 3)
    and
    score['before'][0][0] == 5
    and
    score['after'][0][0] == 25
),"Incorrect score"

In [ ]:
score = getScore( userID1AnswerFR )
#print(score)
assert (
    (len(score.values.flatten()) == 3)
    and
    score['before'][0][0] == 23
),"Incorrect score"

In [ ]:
score = getScore( userIDAnswersFR )
#print(score)
assert (
    (len(score.values.flatten()) == 3)
    and
    score['before'][0][0] == 15
    and
    score['after'][0][0] == 26
),"Incorrect score"

In [ ]:
score = getScore( userIDAnswersENFR )
#print(score)
assert (
    (len(score.values.flatten()) == 3)
    and
    score['before'][0][0] == 4
    and
    score['after'][0][0] == 13
),"Incorrect score"

getValidatedCheckpoints


In [ ]:
objective = 0
assert (len(getValidatedCheckpoints( userIDThatDidNotAnswer )) == objective),"Incorrect number of answers"

In [ ]:
objective = 1
assert (len(getValidatedCheckpoints( userID1AnswerEN )) == objective),"Incorrect number of answers"

In [ ]:
assert (getValidatedCheckpoints( userID1AnswerEN )[0].equals(validableCheckpoints)) \
    , "User has validated everything"

In [ ]:
objective = 2
assert (len(getValidatedCheckpoints( userIDAnswersEN )) == objective),"Incorrect number of answers"

In [ ]:
objective = 3
assert (len(getValidatedCheckpoints( userIDAnswersEN )[0]) == objective) \
    , "User has validated " + objective + " chapters on first try"

In [ ]:
objective = 1
assert (len(getValidatedCheckpoints( userID1AnswerFR )) == objective),"Incorrect number of answers"

In [ ]:
assert (getValidatedCheckpoints( userID1AnswerFR )[0].equals(validableCheckpoints)) \
    , "User has validated everything"

In [ ]:
objective = 2
assert (len(getValidatedCheckpoints( userIDAnswersFR )) == objective),"Incorrect number of answers"

In [ ]:
objective = 5
assert (len(getValidatedCheckpoints( userIDAnswersFR )[1]) == objective) \
    , "User has validated " + objective + " chapters on second try"

In [ ]:
objective = 2
assert (len(getValidatedCheckpoints( userIDAnswersENFR )) == objective),"Incorrect number of answers"

In [ ]:
objective = 5
assert (len(getValidatedCheckpoints( userIDAnswersENFR )[1]) == objective) \
    , "User has validated " + objective + " chapters on second try"

getNonValidated


In [ ]:
getValidatedCheckpoints( userIDThatDidNotAnswer )

In [ ]:
pd.Series(getValidatedCheckpoints( userIDThatDidNotAnswer ))

In [ ]:
type(getNonValidated(pd.Series(getValidatedCheckpoints( userIDThatDidNotAnswer ))))

In [ ]:
validableCheckpoints

In [ ]:
assert(getNonValidated(getValidatedCheckpoints( userIDThatDidNotAnswer ))).equals(validableCheckpoints), \
"incorrect validated checkpoints: should contain all checkpoints that can be validated"

In [ ]:
testSeries = pd.Series( 
    [
            '', # 7
            '', # 8
            '', # 9
            '', # 10

            'tutorial1.Checkpoint00', # 11
            'tutorial1.Checkpoint00', # 12
            'tutorial1.Checkpoint00', # 13
            'tutorial1.Checkpoint00', # 14
            'tutorial1.Checkpoint02', # 15
            'tutorial1.Checkpoint01', # 16
            'tutorial1.Checkpoint05'
        ]
    )
assert(getNonValidated(pd.Series([testSeries]))[0][0] == 'tutorial1.Checkpoint13'), "Incorrect non validated checkpoint"

getNonValidatedCheckpoints


In [ ]:
getNonValidatedCheckpoints( userIDThatDidNotAnswer )

In [ ]:
getNonValidatedCheckpoints( userID1AnswerEN )

In [ ]:
getNonValidatedCheckpoints( userIDAnswersEN )

In [ ]:
getNonValidatedCheckpoints( userID1AnswerFR )

In [ ]:
getNonValidatedCheckpoints( userIDAnswersFR )

In [ ]:
getNonValidatedCheckpoints( userIDAnswersENFR )

getValidatedCheckpointsCounts


In [ ]:
getValidatedCheckpointsCounts(userIDThatDidNotAnswer)
getValidatedCheckpointsCounts(userID1AnswerEN)
getValidatedCheckpointsCounts(userIDAnswersEN)
getValidatedCheckpointsCounts(userID1ScoreEN)
getValidatedCheckpointsCounts(userIDScoresEN)
getValidatedCheckpointsCounts(userID1AnswerFR)
getValidatedCheckpointsCounts(userIDAnswersFR)
getValidatedCheckpointsCounts(userID1ScoreFR)
getValidatedCheckpointsCounts(userIDScoresFR)
getValidatedCheckpointsCounts(userIDAnswersENFR)

getNonValidatedCheckpointsCounts


In [ ]:
getNonValidatedCheckpointsCounts(userIDThatDidNotAnswer)
getNonValidatedCheckpointsCounts(userID1AnswerEN)
getNonValidatedCheckpointsCounts(userIDAnswersEN)
getNonValidatedCheckpointsCounts(userID1ScoreEN)
getNonValidatedCheckpointsCounts(userIDScoresEN)
getNonValidatedCheckpointsCounts(userID1AnswerFR)
getNonValidatedCheckpointsCounts(userIDAnswersFR)
getNonValidatedCheckpointsCounts(userID1ScoreFR)
getNonValidatedCheckpointsCounts(userIDScoresFR)
getNonValidatedCheckpointsCounts(userIDAnswersENFR)

getAllAnswerRows


In [ ]:
aYes = ["Yes", "Oui"]
aNo = ["No", "Non"]
aNoIDK = ["No", "Non", "I don't know", "Je ne sais pas"]

# How long have you studied biology?
qBiologyEducationLevelIndex = 5
aBiologyEducationLevelHigh = ["Until bachelor's degree", "Jusqu'à la license"]
aBiologyEducationLevelLow = ['Until the end of high school', 'Until the end of middle school', 'Not even in middle school'\
                                               "Jusqu'au bac", "Jusqu'au brevet", 'Jamais']
# Have you ever heard about BioBricks?
qHeardBioBricksIndex = 8
# Have you played the current version of Hero.Coli?
qPlayedHerocoliIndex = 10
qPlayedHerocoliYes = ['Yes', 'Once', 'Multiple times', 'Oui',
       'De nombreuses fois', 'Quelques fois', 'Une fois']
qPlayedHerocoliNo = ['No', 'Non',]

In [ ]:
gform['How long have you studied biology?'].unique()

In [ ]:
gform['Before playing Hero.Coli, had you ever heard about BioBricks?'].unique()

In [ ]:
gform['Have you played the current version of Hero.Coli?'].unique()

In [ ]:
getAllAnswerRows(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)

In [ ]:
assert(len(getAllAnswerRows(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)) != 0)

In [ ]:
assert(len(getAllAnswerRows(qBiologyEducationLevelIndex, aBiologyEducationLevelLow)) != 0)

In [ ]:
assert(len(getAllAnswerRows(qHeardBioBricksIndex, aYes)) != 0)

In [ ]:
assert(len(getAllAnswerRows(qHeardBioBricksIndex, aNoIDK)) != 0)

In [ ]:
assert(len(getAllAnswerRows(qPlayedHerocoliIndex, qPlayedHerocoliYes)) != 0)

In [ ]:
assert(len(getAllAnswerRows(qPlayedHerocoliIndex, qPlayedHerocoliNo)) != 0)

getPercentCorrectPerColumn

tested through getPercentCorrectKnowingAnswer

getPercentCorrectKnowingAnswer


In [ ]:
questionIndex = 15
gform.iloc[:, questionIndex].head()

In [ ]:
(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)

In [ ]:
getAllAnswerRows(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)

In [ ]:
getPercentCorrectKnowingAnswer(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)

In [ ]:
getPercentCorrectKnowingAnswer(qBiologyEducationLevelIndex, aBiologyEducationLevelLow)

In [ ]:
getPercentCorrectKnowingAnswer(qHeardBioBricksIndex, aYes)

In [ ]:
getPercentCorrectKnowingAnswer(qHeardBioBricksIndex, aNoIDK)

In [ ]:
playedHerocoliIndexYes = getPercentCorrectKnowingAnswer(qPlayedHerocoliIndex, qPlayedHerocoliYes)
playedHerocoliIndexYes

In [ ]:
playedHerocoliIndexNo = getPercentCorrectKnowingAnswer(qPlayedHerocoliIndex, qPlayedHerocoliNo)
playedHerocoliIndexNo

In [ ]:
playedHerocoliIndexYes - playedHerocoliIndexNo

In [ ]:
(playedHerocoliIndexYes - playedHerocoliIndexNo) / (1 - playedHerocoliIndexNo)

Google form loading


In [ ]:
#gform = gformEN

In [ ]:
transposed = gform.T
#answers = transposed[transposed[]]
transposed

In [ ]:
type(gform)

Selection of a question


In [ ]:
gform.columns

In [ ]:
gform.columns.get_loc('Do not edit -  pre-filled anonymous ID')

In [ ]:
localplayerguidkey

In [ ]:
# Using the whole question:
gform[localplayerguidkey]

In [ ]:
# Get index from question
localplayerguidindex

In [ ]:
# Using the index of the question:
gform.iloc[:, localplayerguidindex]

Selection of a user's answers

userIDThatDidNotAnswer
userID1AnswerEN
userIDAnswersEN
userID1AnswerFR
userIDAnswersFR
userIDAnswersENFR

getUniqueUserCount tinkering


In [ ]:
sample = gform

#def getUniqueUserCount(sample):
sample[localplayerguidkey].nunique()

getAllRespondersGFormGUID tinkering


In [ ]:
userIds = gform[localplayerguidkey].unique()
len(userIds)

getRandomGFormGUID tinkering


In [ ]:
allResponders = getAllResponders()
uniqueUsers = np.unique(allResponders)
print(len(allResponders))
print(len(uniqueUsers))
for guid in uniqueUsers:
    if(not isGUIDFormat(guid)):
        print('incorrect guid: ' + str(guid))

In [ ]:
uniqueUsers = getAllResponders()
userCount = len(uniqueUsers)
guid = '0'
while (not isGUIDFormat(guid)):
    userIndex = randint(0,userCount-1)
    guid = uniqueUsers[userIndex]
guid

getAnswers tinkering


In [ ]:
#userId = userIDThatDidNotAnswer
#userId = userID1AnswerEN
userId = userIDAnswersEN

_form = gform

#def getAnswers( userId, _form = gform ):
answers = _form[_form[localplayerguidkey]==userId]
_columnAnswers = answers.T

if 0 != len(answers):
    _newColumns = []
    for column in _columnAnswers.columns:
        _newColumns.append(answersColumnNameStem + str(column))
    _columnAnswers.columns = _newColumns
else:
    # user has never answered
    print("user " + str(userId) + " has never answered")

_columnAnswers

answer selection


In [ ]:
answers

In [ ]:
# Selection of a specific answer
answers.iloc[:,localplayerguidindex]

In [ ]:
answers.iloc[:,localplayerguidindex].iloc[0]

In [ ]:
type(answers.iloc[0,:])

In [ ]:
answers.iloc[0,:].values

checking answers


In [ ]:
#### Question that has a correct answer:

In [ ]:
questionIndex = 15

In [ ]:
answers.iloc[:,questionIndex].iloc[0]

In [ ]:
correctAnswers.iloc[questionIndex][0]

In [ ]:
answers.iloc[:,questionIndex].iloc[0].startswith(correctAnswers.iloc[questionIndex][0])

In [ ]:
#### Question that has no correct answer:

In [ ]:
questionIndex = 0
#answers.iloc[:,questionIndex].iloc[0].startswith(correctAnswers.iloc[questionIndex].iloc[0])

In [ ]:
#### Batch check:

In [ ]:
columnAnswers = getAnswers( userId )

In [ ]:
columnAnswers.values[2,0]

In [ ]:
columnAnswers[columnAnswers.columns[0]][2]

In [ ]:
correctAnswers

In [ ]:
type(columnAnswers)

In [ ]:
indexOfFirstEvaluationQuestion = 13
columnAnswers.index[indexOfFirstEvaluationQuestion]

getTemporality tinkering


In [ ]:
gform.tail(50)

In [ ]:
gform[gform[localplayerguidkey] == 'ba202bbc-af77-42e8-85ff-e25b871717d5']

In [ ]:
gformRealBefore = gform.loc[88, 'Timestamp']
gformRealBefore

In [ ]:
gformRealAfter = gform.loc[107, 'Timestamp']
gformRealAfter

In [ ]:
RMRealFirstEvent = getFirstEventDate(gform.loc[88,localplayerguidkey])
RMRealFirstEvent

getTemporality tinkering


In [ ]:
tzAnswerDate = gformRealBefore
gameEventDate = RMRealFirstEvent

#def getTemporality( answerDate, gameEventDate ):
result = answerTemporalities[2]
if(gameEventDate != pd.Timestamp.max.tz_localize('utc')):
    if(answerDate <= gameEventDate):
        result = answerTemporalities[0]
    elif (answerDate > gameEventDate):
        result = answerTemporalities[1]
result, tzAnswerDate, gameEventDate

In [ ]:


In [ ]:
firstEventDate = getFirstEventDate(gform.loc[userIndex,localplayerguidkey])
firstEventDate

In [ ]:
gformTestBefore = pd.Timestamp('2018-01-16 14:28:20.998000+0000', tz='UTC')
getTemporality(gformTestBefore,firstEventDate)

In [ ]:
gformTestWhile = pd.Timestamp('2018-01-16 14:28:23.998000+0000', tz='UTC')
getTemporality(gformTestWhile,firstEventDate)

In [ ]:
gformTestAfter = pd.Timestamp('2018-01-16 14:28:24.998000+0000', tz='UTC')
getTemporality(gformTestAfter,firstEventDate)

getTestAnswers tinkering


In [ ]:
_form = gform
_rmDF = rmdf152
_rmTestDF = normalizedRMDFTest
includeAndroid = True

#def getTestAnswers( _form = gform, _rmDF = rmdf152, _rmTestDF = normalizedRMDFTest, includeAndroid = True):
_form[_form[localplayerguidkey].isin(testUsers)]

In [ ]:
_form[localplayerguidkey]

In [ ]:
testUsers

In [ ]:
len(getTestAnswers()[localplayerguidkey])

In [ ]:
rmdf152['customData.platform'].unique()

In [ ]:
rmdf152[rmdf152['customData.platform'].apply(lambda s: str(s).endswith('editor'))]

In [ ]:
rmdf152[rmdf152['userId'].isin(getTestAnswers()[localplayerguidkey])][['userTime','customData.platform','userId']].dropna()

getCorrections tinkering


In [ ]:
columnAnswers

In [ ]:
#testUserId = userID1AnswerEN
testUserId = '8d352896-a3f1-471c-8439-0f426df901c1'

In [ ]:
getCorrections(testUserId)

In [ ]:
testUserId = '8d352896-a3f1-471c-8439-0f426df901c1'
source = correctAnswers

#def getCorrections( _userId, _source = correctAnswers, _form = gform ):
columnAnswers = getAnswers( testUserId )

if 0 != len(columnAnswers.columns):

    questionsCount = len(columnAnswers.values)

    for columnName in columnAnswers.columns:
        if answersColumnNameStem in columnName:
            answerNumber = columnName.replace(answersColumnNameStem,"")
            newCorrectionsColumnName = correctionsColumnNameStem + answerNumber

            columnAnswers[newCorrectionsColumnName] = columnAnswers[columnName]
            columnAnswers[newCorrectionsColumnName] = pd.Series(np.full(questionsCount, np.nan))

            for question in columnAnswers[columnName].index:
                #print()
                #print(question)
                __correctAnswers = source.loc[question]

                if(len(__correctAnswers) > 0):
                    columnAnswers.loc[question,newCorrectionsColumnName] = False
                    for correctAnswer in __correctAnswers:
                        #print("-> " + correctAnswer)
                        if str(columnAnswers.loc[question,columnName])\
                        .startswith(str(correctAnswer)):
                            columnAnswers.loc[question,newCorrectionsColumnName] = True
                            break


else:
    # user has never answered
    print("can't give correct answers")
columnAnswers

In [ ]:
question = 'How old are you?'
columnName = ''
for column in columnAnswers.columns:
    if str.startswith(column, 'answers'):
        columnName = column
        break

In [ ]:
type(columnAnswers.loc[question,columnName])

In [ ]:
getCorrections(localplayerguid)

In [ ]:
gform.columns[20]

In [ ]:
columnAnswers.loc[gform.columns[20],columnAnswers.columns[1]]

In [ ]:
columnAnswers[columnAnswers.columns[1]][gform.columns[13]]

In [ ]:
columnAnswers.loc[gform.columns[13],columnAnswers.columns[1]]

In [ ]:
columnAnswers.iloc[20,1]

In [ ]:
questionsCount

In [ ]:
np.full(3, np.nan)

In [ ]:
pd.Series(np.full(questionsCount, np.nan))

In [ ]:
columnAnswers.loc[question,newCorrectionsColumnName]

In [ ]:
question

In [ ]:
correctAnswers[question]

In [ ]:
getCorrections('8d352896-a3f1-471c-8439-0f426df901c1')

getCorrections extensions tinkering


In [ ]:
correctAnswersEN
#demographicAnswersEN
type([])

In [ ]:
mergedCorrectAnswersEN = correctAnswersEN.copy()
for index in mergedCorrectAnswersEN.index:
    #print(str(mergedCorrectAnswersEN.loc[index,column]))
    mergedCorrectAnswersEN.loc[index] =\
    demographicAnswersEN.loc[index] + mergedCorrectAnswersEN.loc[index]
mergedCorrectAnswersEN

In [ ]:
correctAnswersEN + demographicAnswersEN

In [ ]:
correctAnswers + demographicAnswers

getBinarizedCorrections tinkering


In [ ]:
corrections = getCorrections(userIDAnswersENFR)
#corrections

In [ ]:
for columnName in corrections.columns:
    if correctionsColumnNameStem in columnName:
        for index in corrections[columnName].index:
                if(True==corrections.loc[index,columnName]):
                    corrections.loc[index,columnName] = 1
                elif (False==corrections.loc[index,columnName]):
                    corrections.loc[index,columnName] = 0
corrections

In [ ]:
binarized = getBinarizedCorrections(corrections)
binarized

In [ ]:
slicedBinarized = binarized[13:40]
slicedBinarized

In [ ]:
slicedBinarized =\
binarized[13:40][binarized.columns[\
binarized.columns.to_series().str.contains(correctionsColumnNameStem)\
                           ]]
slicedBinarized

getBinarized tinkering


In [ ]:
_source = correctAnswers
_userId = getRandomGFormGUID()
getCorrections(_userId, _source=_source, _form = gform)

In [ ]:
_userId = '5e978fb3-316a-42ba-bb58-00856353838d'
gform[gform[localplayerguidkey] == _userId].iloc[0].index

In [ ]:
_gformLine = gform[gform[localplayerguidkey] == _userId].iloc[0]
_gformLine.loc['Before playing Hero.Coli, had you ever heard about synthetic biology?']

In [ ]:
_gformLine = gform[gform[localplayerguidkey] == _userId].iloc[0]

# only for one user
# def getBinarized(_gformLine, _source = correctAnswers):
_notEmptyIndexes = []
for _index in _source.index:
    if(len(_source.loc[_index]) > 0):
        _notEmptyIndexes.append(_index)

_binarized = pd.Series(np.full(len(_gformLine.index), np.nan), index = _gformLine.index)
        
for question in _gformLine.index:
    _correctAnswers = _source.loc[question]

    if(len(_correctAnswers) > 0):
        _binarized[question] = 0
        for _correctAnswer in _correctAnswers:
            if str(_gformLine.loc[question])\
            .startswith(str(_correctAnswer)):
                _binarized.loc[question] = 1
                break

_slicedBinarized = _binarized.loc[_notEmptyIndexes]

_slicedBinarized

In [ ]:
_slicedBinarized.loc['What are BioBricks and devices?']

getAllBinarized tinkering


In [ ]:
allBinarized = getAllBinarized()

In [ ]:
plotCorrelationMatrix(allBinarized)

In [ ]:
source

In [ ]:
source = correctAnswers + demographicAnswers
notEmptyIndexes = []
for eltIndex in source.index:
    #print(eltIndex)
    if(len(source.loc[eltIndex]) > 0):
        notEmptyIndexes.append(eltIndex)
len(source)-len(notEmptyIndexes)

In [ ]:
emptyForm = gform[gform[localplayerguidkey] == 'incorrectGUID']

In [ ]:
emptyForm

In [ ]:
_source = correctAnswers + demographicAnswers
_form = gform #emptyForm

#def getAllBinarized(_source = correctAnswers, _form = gform ):
_notEmptyIndexes = []
for _index in _source.index:
    if(len(_source.loc[_index]) > 0):
        _notEmptyIndexes.append(_index)

_result = pd.DataFrame(index = _notEmptyIndexes)
for _userId in getAllResponders( _form = _form ):
    _corrections = getCorrections(_userId, _source=_source, _form = _form)
    _binarized = getBinarizedCorrections(_corrections)
    _slicedBinarized =\
_binarized.loc[_notEmptyIndexes][_binarized.columns[\
_binarized.columns.to_series().str.contains(correctionsColumnNameStem)\
                                   ]]

    _result = pd.concat([_result, _slicedBinarized], axis=1)

_result = _result.T

#_result

In [ ]:
if(_result.shape[0] > 0 and _result.shape[1] > 0):
    correlation = _result.astype(float).corr()
    #plt.matshow(correlation)
    sns.clustermap(correlation,cmap=plt.cm.jet,square=True,figsize=(10,10))

In [ ]:
#ax = sns.clustermap(correlation,cmap=plt.cm.jet,square=True,figsize=(10,10),cbar_kws={\
#"orientation":"vertical"})

In [ ]:
correlation_pearson = _result.T.astype(float).corr(methods[0])
correlation_kendall = _result.T.astype(float).corr(methods[1])
correlation_spearman = _result.T.astype(float).corr(methods[2])
print(correlation_pearson.equals(correlation_kendall))
print(correlation_kendall.equals(correlation_spearman))
diff = (correlation_pearson - correlation_kendall)
flattened = diff[diff > 0.1].values.flatten()
flattened[~np.isnan(flattened)]

In [ ]:
correlation

plotCorrelationMatrix tinkering


In [ ]:
scientificQuestionsLabels = gform.columns[13:40]
scientificQuestionsLabels = [
'In order to modify the abilities of the bacterium, you have to... #1',
'What are BioBricks and devices? #2',
'What is the name of this BioBrick? #3',
'What is the name of this BioBrick?.1 #4',
'What is the name of this BioBrick?.2 #5',
'What is the name of this BioBrick?.3 #6',
'What does this BioBrick do? #7',
'What does this BioBrick do?.1 #8',
'What does this BioBrick do?.2 #9',
'What does this BioBrick do?.3 #10',
'Pick the case where the BioBricks are well-ordered: #11',
'When does green fluorescence happen? #12',
'What happens when you unequip the movement device? #13',
'What is this? #14',
'What does this device do? #15',
'What does this device do?.1 #16',
'What does this device do?.2 #17',
'What does this device do?.3 #18',
'What does this device do?.4 #19',
'What does this device do?.5 #20',
'What does this device do?.6 #21',
'What does this device do?.7 #22',
'Guess: what would a device producing l-arabinose do, if it started with a l-arabinose-induced promoter? #23',
'Guess: the bacterium would glow yellow... #24',
'What is the species of the bacterium of the game? #25',
'What is the scientific name of the tails of the bacterium? #26',
'Find the antibiotic: #27',
]

scientificQuestionsLabelsX = [
'#1 In order to modify the abilities of the bacterium, you have to...',
'#2 What are BioBricks and devices?',
'#3 What is the name of this BioBrick?',
'#4 What is the name of this BioBrick?.1',
'#5 What is the name of this BioBrick?.2',
'#6 What is the name of this BioBrick?.3',
'#7 What does this BioBrick do?',
'#8 What does this BioBrick do?.1',
'#9 What does this BioBrick do?.2',
'#10 What does this BioBrick do?.3',
'#11 Pick the case where the BioBricks are well-ordered:',
'#12 When does green fluorescence happen?',
'#13 What happens when you unequip the movement device?',
'#14 What is this?',
'#15 What does this device do?',
'#16 What does this device do?.1',
'#17 What does this device do?.2',
'#18 What does this device do?.3',
'#19 What does this device do?.4',
'#20 What does this device do?.5',
'#21 What does this device do?.6',
'#22 What does this device do?.7',
'Guess: what would a device producing l-arabinose do, if it started with a l-arabinose-induced p#23 romoter?',
'#24 Guess: the bacterium would glow yellow...',
'#25 What is the species of the bacterium of the game?',
'#26 What is the scientific name of the tails of the bacterium?',
'#27 Find the antibiotic:',
]

In [ ]:
questionsLabels = scientificQuestionsLabels
questionsLabelsX = scientificQuestionsLabelsX


fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)
ax.set_yticklabels(['']+questionsLabels)
ax.set_xticklabels(['']+questionsLabelsX, rotation='vertical')

ax.matshow(correlation)
ax.set_xticks(np.arange(-1,len(questionsLabels),1.));
ax.set_yticks(np.arange(-1,len(questionsLabels),1.));

In [ ]:
questionsLabels = correlation.columns.copy()
newLabels = []
for index in range(0, len(questionsLabels)):
    newLabels.append(questionsLabels[index] + ' #' + str(index + 1))
correlationRenamed = correlation.copy()
correlationRenamed.columns = newLabels
correlationRenamed.index = newLabels
correlationRenamed

In [ ]:
correlationRenamed = correlation.copy()
correlationRenamed.columns = pd.Series(correlation.columns).apply(lambda x: x + ' #' + str(correlation.columns.get_loc(x) + 1))
correlationRenamed.index = correlationRenamed.columns
correlationRenamed

In [ ]:
correlation.shape

In [ ]:
fig = plt.figure(figsize=(10,10))
ax12 = plt.subplot(111)
ax12.set_title('Heatmap')
sns.heatmap(correlation,ax=ax12,cmap=plt.cm.jet,square=True)

In [ ]:
ax = sns.clustermap(correlation,cmap=plt.cm.jet,square=True,figsize=(10,10),cbar_kws={\
"orientation":"vertical"})

In [ ]:
questionsLabels = pd.Series(correlation.columns).apply(lambda x: x + ' #' + str(correlation.columns.get_loc(x) + 1))

fig = plt.figure(figsize=(10,10))
ax = plt.subplot(111)

cmap=plt.cm.jet
#cmap=plt.cm.ocean
cax = ax.imshow(correlation, interpolation='nearest', cmap=cmap,
#    extent=(0.5,np.shape(correlation)[0]+0.5,0.5,np.shape(correlation)[1]+0.5)
               )
#ax.grid(True)
plt.title('Questions\' Correlations')
ax.set_yticklabels(questionsLabels)
ax.set_xticklabels(questionsLabels, rotation='vertical')
ax.set_xticks(np.arange(len(questionsLabels)));
ax.set_yticks(np.arange(len(questionsLabels)));
#ax.set_xticks(np.arange(-1,len(questionsLabels),1.));
#ax.set_yticks(np.arange(-1,len(questionsLabels),1.));

fig.colorbar(cax)
plt.show()

In [ ]:
ax.get_xticks()

In [ ]:
transposed = _result.T.astype(float)
transposed.head()

In [ ]:
transposed.corr()

In [ ]:
transposed.columns = range(0,len(transposed.columns))
transposed.index = range(0,len(transposed.index))
transposed.head()

In [ ]:
transposed = transposed.iloc[0:10,0:3]
transposed

In [ ]:
transposed = transposed.astype(float)

In [ ]:
type(transposed[0][0])

In [ ]:
transposed.columns = list('ABC')
transposed

In [ ]:
transposed.loc[0, 'A'] = 0
transposed

In [ ]:
transposed.corr()

data = transposed[[0,1]] data.corr(method = 'spearman')


In [ ]:
round(7.64684)

In [ ]:
df = pd.DataFrame(10*np.random.randint(2, size=[20,2]),index=range(0,20),columns=list('AB'))
#df.columns = range(0,len(df.columns))
df.head()
#type(df[0][0])

In [ ]:
type(df.columns)

In [ ]:
df.corr()

In [ ]:
#corr = pd.Series({}, index = methods)
for meth in methods:
    #corr[meth] = result.corr(method = meth)
    print(meth + ":\n" + str(transposed.corr(method = meth)) + "\n\n")

getCrossCorrectAnswers tinkering


In [ ]:

Before

In [ ]:
befores = gform.copy()
befores = befores[befores['Temporality'] == 'before']
print(len(befores))
allBeforesBinarized = getAllBinarized( _source = correctAnswers + demographicAnswers, _form = befores)

In [ ]:
np.unique(allBeforesBinarized.values.flatten())

In [ ]:
allBeforesBinarized.columns[20]

In [ ]:
allBeforesBinarized.T.dot(allBeforesBinarized)

In [ ]:
np.unique(allBeforesBinarized.iloc[:,20].values)

In [ ]:
plotCorrelationMatrix( allBeforesBinarized, _abs=False,\
                      _clustered=False, _questionNumbers=True )

In [ ]:
_correlation = allBeforesBinarized.astype(float).corr()

overlay = allBeforesBinarized.T.dot(allBeforesBinarized).astype(int)
            
_correlation.columns = pd.Series(_correlation.columns).apply(\
    lambda x: x + ' #' + str(_correlation.columns.get_loc(x) + 1))
_correlation.index = _correlation.columns

_correlation = _correlation.abs()

_fig = plt.figure(figsize=(20,20))
_ax = plt.subplot(111)

#sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True,annot=overlay,fmt='d')
sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True,annot=True)
after

In [ ]:
afters = gform.copy()
afters = afters[afters['Temporality'] == 'after']
print(len(afters))
allAftersBinarized = getAllBinarized( _source = correctAnswers + demographicAnswers, _form = afters)

In [ ]:
np.unique(allAftersBinarized.values.flatten())

In [ ]:
plotCorrelationMatrix( allAftersBinarized, _abs=False,\
                      _clustered=False, _questionNumbers=True )

In [ ]:
#for answerIndex in range(0,len(allAftersBinarized)):
#    print(str(answerIndex) + " " + str(allAftersBinarized.iloc[answerIndex,0]))

In [ ]:
allAftersBinarized.iloc[28,0]

In [ ]:
len(allAftersBinarized)

In [ ]:
len(allAftersBinarized.index)

In [ ]:
_correlation = allAftersBinarized.astype(float).corr()

overlay = allAftersBinarized.T.dot(allAftersBinarized).astype(int)

_correlation.columns = pd.Series(_correlation.columns).apply(\
    lambda x: x + ' #' + str(_correlation.columns.get_loc(x) + 1))
_correlation.index = _correlation.columns

_fig = plt.figure(figsize=(10,10))
_ax = plt.subplot(111)

#sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True,annot=overlay,fmt='d')
sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True)

In [ ]:
crossCorrect = getCrossCorrectAnswers(allAftersBinarized)

In [ ]:
pd.Series((overlay == crossCorrect).values.flatten()).unique()

In [ ]:
allAftersBinarized.shape

In [ ]:
cross = allAftersBinarized.T.dot(allAftersBinarized)
cross.shape

In [ ]:
equal = (cross == crossCorrect)
type(equal)

In [ ]:
pd.Series(equal.values.flatten()).unique()

getScore tinkering


In [ ]:
testUser = userIDAnswersFR

In [ ]:
gform[gform[localplayerguidkey] == testUser].T

In [ ]:
getScore(testUser)

In [ ]:
print("draft test")

testUserId = "3ef14300-4987-4b54-a56c-5b6d1f8a24a1"
testUserId = userIDAnswersEN

#def getScore( _userId, _form = gform ):
score = pd.DataFrame({}, columns = answerTemporalities)
score.loc['score',:] = np.nan
for column in score.columns:
    score.loc['score', column] = []

if hasAnswered( testUserId ):
    columnAnswers = getCorrections(testUserId)
    for columnName in columnAnswers.columns:
        # only work on corrected columns
        if correctionsColumnNameStem in columnName:
            answerColumnName = columnName.replace(correctionsColumnNameStem,\
                                                  answersColumnNameStem)
            temporality = columnAnswers.loc['Temporality',answerColumnName]

            counts = (columnAnswers[columnName]).value_counts()
            thisScore = 0
            if(True in counts):
                thisScore = counts[True]
            score.loc['score',temporality].append(thisScore)
else:
    print("user " + str(testUserId) + " has never answered")

#expectedScore = 18
#if (expectedScore != score[0]):
#    print("ERROR incorrect score: expected "+ str(expectedScore) +", got "+ str(score))
score

In [ ]:
score = pd.DataFrame({}, columns = answerTemporalities)
score.loc['score',:] = np.nan
for column in score.columns:
    score.loc['score', column] = []
score

In [ ]:
#score.loc['user0',:] = [1,2,3]

In [ ]:
#score

In [ ]:
#type(score)

In [ ]:
#type(score[0])

In [ ]:
#for i,v in score[0].iteritems():
#    print(v)

In [ ]:
#score[0]['undefined']

In [ ]:
#columnAnswers.loc['Temporality','answers0']

In [ ]:
False in (columnAnswers[columnName]).value_counts()

In [ ]:
getScore("3ef14300-4987-4b54-a56c-5b6d1f8a24a1")

In [ ]:
#gform[gform[localplayerguidkey]=="3ef14300-4987-4b54-a56c-5b6d1f8a24a1"].T

In [ ]:
correctAnswers

comparison of checkpoints completion and answers

Theoretically, they should match. Whoever understood an item should beat the matching challenge. The discrepancies are due to game design or level design.

getValidatedCheckpoints tinkering


In [ ]:
#questionnaireValidatedCheckpointsPerQuestion = pd.Series(np.nan, index=range(35))
questionnaireValidatedCheckpointsPerQuestion = pd.Series(np.nan, index=range(len(checkpointQuestionMatching)))
questionnaireValidatedCheckpointsPerQuestion.head()

In [ ]:
checkpointQuestionMatching['checkpoint'][19]

In [ ]:
userId = localplayerguid
_form = gform

#function that returns the list of checkpoints from user id
#def getValidatedCheckpoints( userId, _form = gform ):
_validatedCheckpoints = []

if hasAnswered( userId, _form = _form ):
    _columnAnswers = getCorrections( userId, _form = _form)

    for _columnName in _columnAnswers.columns:
        # only work on corrected columns
        if correctionsColumnNameStem in _columnName:        
            _questionnaireValidatedCheckpointsPerQuestion = pd.Series(np.nan, index=range(len(checkpointQuestionMatching)))

            for _index in range(0, len(_questionnaireValidatedCheckpointsPerQuestion)):
                if _columnAnswers[_columnName][_index]==True:
                    _questionnaireValidatedCheckpointsPerQuestion[_index] = checkpointQuestionMatching['checkpoint'][_index]
                else:
                    _questionnaireValidatedCheckpointsPerQuestion[_index] = ''

            _questionnaireValidatedCheckpoints = _questionnaireValidatedCheckpointsPerQuestion.unique()
            _questionnaireValidatedCheckpoints = _questionnaireValidatedCheckpoints[_questionnaireValidatedCheckpoints!='']
            _questionnaireValidatedCheckpoints = pd.Series(_questionnaireValidatedCheckpoints)
            _questionnaireValidatedCheckpoints = _questionnaireValidatedCheckpoints.sort_values()
            _questionnaireValidatedCheckpoints.index = range(0, len(_questionnaireValidatedCheckpoints))

            _validatedCheckpoints.append(_questionnaireValidatedCheckpoints) 
else:
    print("user " + str(userId) + " has never answered")
result = pd.Series(data=_validatedCheckpoints)

In [ ]:
result

In [ ]:
type(result[0])

getNonValidated tinkering


In [ ]:
testSeries1 = pd.Series( 
    [
            'tutorial1.Checkpoint00',
            'tutorial1.Checkpoint01',
            'tutorial1.Checkpoint02',
            'tutorial1.Checkpoint05'
        ]
    )
testSeries2 = pd.Series( 
    [
            'tutorial1.Checkpoint01',
            'tutorial1.Checkpoint05'
        ]
    )
np.setdiff1d(testSeries1, testSeries2)
np.setdiff1d(testSeries1.values, testSeries2.values)

In [ ]:
getAnswers(localplayerguid).head(2)

In [ ]:
getCorrections(localplayerguid).head(2)

In [ ]:
getScore(localplayerguid)

In [ ]:
getValidatedCheckpoints(localplayerguid)

In [ ]:
getNonValidatedCheckpoints(localplayerguid)

getAllAnswerRows tinkering


In [ ]:
qPlayedHerocoliIndex = 10
qPlayedHerocoliYes = ['Yes', 'Once', 'Multiple times', 'Oui',
       'De nombreuses fois', 'Quelques fois', 'Une fois']
questionIndex = qPlayedHerocoliIndex
choice = qPlayedHerocoliYes

_form = gform

# returns all rows of Google form's answers that contain an element 
#   of the array 'choice' for question number 'questionIndex'
#def getAllAnswerRows(questionIndex, choice, _form = gform ):
_form[_form.iloc[:, questionIndex].isin(choice)]

getPercentCorrectPerColumn tinkering


In [ ]:
_df = getAllAnswerRows(qPlayedHerocoliIndex, qPlayedHerocoliYes, _form = gform )
#def getPercentCorrectPerColumn(_df):
_count = len(_df)
_percents = pd.Series(np.full(len(_df.columns), np.nan), index=_df.columns)
for _rowIndex in _df.index:
    for _columnName in _df.columns:
        _columnIndex = _df.columns.get_loc(_columnName)
        if ((_columnIndex >= firstEvaluationQuestionIndex) \
            and (_columnIndex < len(_df.columns)-3)):
            if(str(_df[_columnName][_rowIndex]).startswith(str(correctAnswers[_columnIndex]))):
                if (np.isnan(_percents[_columnName])):
                    _percents[_columnName] = 1;
                else:
                    _percents[_columnName] = _percents[_columnName]+1
            else:
                if (np.isnan(_percents[_columnName])):
                    _percents[_columnName] = 0;

_percents = _percents/_count
_percents['Count'] = _count
_percents
print('\n\n\npercents=\n' + str(_percents))

getPercentCorrectKnowingAnswer tinkering


In [ ]:
questionIndex = qPlayedHerocoliIndex
choice = qPlayedHerocoliYes
_form = gform

#def getPercentCorrectKnowingAnswer(questionIndex, choice, _form = gform):
_answerRows = getAllAnswerRows(questionIndex, choice, _form = _form);
getPercentCorrectPerColumn(_answerRows)

tests on all user Ids, including those who answered more than once


In [ ]:
#localplayerguid = '8d352896-a3f1-471c-8439-0f426df901c1'
#localplayerguid = '7037c5b2-c286-498e-9784-9a061c778609'
#localplayerguid = '5c4939b5-425b-4d19-b5d2-0384a515539e'
#localplayerguid = '7825d421-d668-4481-898a-46b51efe40f0'
#localplayerguid = 'acb9c989-b4a6-4c4d-81cc-6b5783ec71d8'

for id in getAllResponders():
    print("===========================================")
    print("id=" + str(id))
    print("-------------------------------------------")
    print(getAnswers(id).head(2))
    print("-------------------------------------------")
    print(getCorrections(id).head(2))
    print("-------------------------------------------")
    print("scores=" + str(getScore(id)))
    print("#ValidatedCheckpoints=" + str(getValidatedCheckpointsCounts(id)))
    print("#NonValidatedCheckpoints=" + str(getNonValidatedCheckpointsCounts(id)))
    print("===========================================")

In [ ]:
gform[localplayerguidkey]
hasAnswered( '8d352896-a3f1-471c-8439-0f426df901c1' )
'8d352896-a3f1-471c-8439-0f426df901c1' in gform[localplayerguidkey].values

In [ ]:
apostropheTestString = 'it\'s a test'
apostropheTestString

answers submitted through time


In [ ]:

merging answers in English and French

tests


In [ ]:
#gformEN.head(2)

In [ ]:
#gformFR.head(2)

add language column

Scores will be evaluated per language


In [ ]:
#gformEN['Language'] = pd.Series('en', index=gformEN.index)
#gformFR['Language'] = pd.Series('fr', index=gformFR.index)

In [ ]:
#gformFR.head(2)

concatenate


In [ ]:
# rename columns
#gformFR.columns = gformEN.columns
#gformFR.head(2)

In [ ]:
#gformTestMerge = pd.concat([gformEN, gformFR])

In [ ]:
#gformTestMerge.head(2)

In [ ]:
#gformTestMerge.tail(2)

In [ ]:
gform

In [ ]:
localplayerguid

In [ ]:
someAnswers = getAnswers( '8ca16c7a-70a6-4723-bd72-65b8485a2e86' )
someAnswers

In [ ]:
testQuestionIndex = 24

In [ ]:
thisUsersFirstEvaluationQuestion = str(someAnswers[someAnswers.columns[0]][testQuestionIndex])
thisUsersFirstEvaluationQuestion

In [ ]:
someAnswers[someAnswers.columns[0]]['Language']

In [ ]:
firstEvaluationQuestionCorrectAnswer = str(correctAnswers[testQuestionIndex])
firstEvaluationQuestionCorrectAnswer

In [ ]:
thisUsersFirstEvaluationQuestion.startswith(firstEvaluationQuestionCorrectAnswer)